{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Before training\n",
    "\n",
    "This program saves the last 3 generations of models to Google Drive. Since 1 generation of models is >1GB, you should have at least 3GB of free space in Google Drive. If you do not have such free space, it is recommended to create another Google Account."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Installation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# @title Check GPU\n",
    "!nvidia-smi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# @title Mount Google Drive\n",
    "from google.colab import drive\n",
    "\n",
    "drive.mount(\"/content/drive\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# @title Install dependencies\n",
    "# @markdown pip may fail to resolve dependencies and raise ERROR, but it can be ignored.\n",
    "!python -m pip install -U pip wheel\n",
    "%pip install -U ipython\n",
    "\n",
    "# @markdown Branch (for development)\n",
    "BRANCH = \"none\"  # @param {\"type\": \"string\"}\n",
    "if BRANCH == \"none\":\n",
    "    %pip install -U so-vits-svc-fork\n",
    "else:\n",
    "    %pip install -U git+https://github.com/34j/so-vits-svc-fork.git@{BRANCH}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# @title Make dataset directory\n",
    "!mkdir -p \"dataset_raw\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# @title Copy your dataset\n",
    "# @markdown **We assume that your dataset is in your Google Drive's `so-vits-svc-fork/dataset/(speaker_name)` directory.**\n",
    "DATASET_NAME = \"kiritan\"  # @param {type: \"string\"}\n",
    "!cp -R /content/drive/MyDrive/so-vits-svc-fork/dataset/{DATASET_NAME}/ -t \"dataset_raw/\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# @title Download dataset (Tsukuyomi-chan JVS)\n",
    "# @markdown You can download this dataset if you don't have your own dataset.\n",
    "# @markdown Make sure you agree to the license when using this dataset.\n",
    "# @markdown https://tyc.rei-yumesaki.net/material/corpus/#toc6\n",
    "# !wget -N https://tyc.rei-yumesaki.net/files/voice/tyc-corpus1.zip\n",
    "# !unzip -O sjis tyc-corpus1.zip\n",
    "# !mv \"/content/つくよみちゃんコーパス Vol.1 声優統計コーパス（JVSコーパス準拠）/おまけ：WAV（+12dB増幅＆高音域削減）/WAV（+12dB増幅＆高音域削減）\" \"dataset_raw/tsukuyomi\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# @title Automatic preprocessing\n",
    "!svc pre-resample"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!svc pre-config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "F0_METHOD = \"dio\"  # @param [\"crepe\", \"crepe-tiny\", \"parselmouth\", \"dio\", \"harvest\"]\n",
    "!svc pre-hubert -fm {F0_METHOD}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# @title Train\n",
    "%load_ext tensorboard\n",
    "%tensorboard --logdir drive/MyDrive/so-vits-svc-fork/logs/44k"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!svc train --model-path drive/MyDrive/so-vits-svc-fork/logs/44k"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Training Cluster model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!svc train-cluster --output-path drive/MyDrive/so-vits-svc-fork/logs/44k/kmeans.pt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Inference"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# @title Get the author's voice as a source\n",
    "import random\n",
    "\n",
    "NAME = str(random.randint(1, 49))\n",
    "TYPE = \"fsd50k\"  # @param [\"\", \"digit\", \"dog\", \"fsd50k\"]\n",
    "CUSTOM_FILEPATH = \"\"  # @param {type: \"string\"}\n",
    "if CUSTOM_FILEPATH != \"\":\n",
    "    NAME = CUSTOM_FILEPATH\n",
    "else:\n",
    "    # it is extremely difficult to find a voice that can download from the internet directly\n",
    "    if TYPE == \"dog\":\n",
    "        !wget -N f\"https://huggingface.co/datasets/437aewuh/dog-dataset/resolve/main/dogs/dogs_{NAME:.0000}.wav\" -O {NAME}.wav\n",
    "    elif TYPE == \"digit\":\n",
    "        # george, jackson, lucas, nicolas, ...\n",
    "        !wget -N f\"https://github.com/Jakobovski/free-spoken-digit-dataset/raw/master/recordings/0_george_{NAME}.wav\" -O {NAME}.wav\n",
    "    elif TYPE == \"fsd50k\":\n",
    "        !wget -N f\"https://huggingface.co/datasets/Fhrozen/FSD50k/blob/main/clips/dev/{10000+int(NAME)}.wav\" -O {NAME}.wav\n",
    "    else:\n",
    "        !wget -N f\"https://zunko.jp/sozai/utau/voice_{\"kiritan\" if NAME < 25 else \"itako\"}{NAME % 5 + 1}.wav\" -O {NAME}.wav\n",
    "from IPython.display import Audio, display\n",
    "\n",
    "display(Audio(f\"{NAME}.wav\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# @title Use trained model\n",
    "# @markdown **Put your .wav file in `so-vits-svc-fork/audio` directory**\n",
    "from IPython.display import Audio, display\n",
    "\n",
    "!svc infer drive/MyDrive/so-vits-svc-fork/audio/{NAME}.wav -m drive/MyDrive/so-vits-svc-fork/logs/44k/ -c drive/MyDrive/so-vits-svc-fork/logs/44k/config.json\n",
    "display(Audio(f\"drive/MyDrive/so-vits-svc-fork/audio/{NAME}.out.wav\", autoplay=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "##@title Use trained model (with cluster)\n",
    "!svc infer {NAME}.wav -s speaker -r 0.1 -m drive/MyDrive/so-vits-svc-fork/logs/44k/ -c drive/MyDrive/so-vits-svc-fork/logs/44k/config.json -k drive/MyDrive/so-vits-svc-fork/logs/44k/kmeans.pt\n",
    "display(Audio(f\"{NAME}.out.wav\", autoplay=True))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Pretrained models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# @title https://huggingface.co/TachibanaKimika/so-vits-svc-4.0-models/tree/main\n",
    "!wget -N \"https://huggingface.co/TachibanaKimika/so-vits-svc-4.0-models/resolve/main/riri/G_riri_220.pth\"\n",
    "!wget -N \"https://huggingface.co/TachibanaKimika/so-vits-svc-4.0-models/resolve/main/riri/config.json\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!svc infer {NAME}.wav -c config.json -m G_riri_220.pth\n",
    "display(Audio(f\"{NAME}.out.wav\", autoplay=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# @title https://huggingface.co/therealvul/so-vits-svc-4.0/tree/main\n",
    "!wget -N \"https://huggingface.co/therealvul/so-vits-svc-4.0/resolve/main/Pinkie%20(speaking%20sep)/G_166400.pth\"\n",
    "!wget -N \"https://huggingface.co/therealvul/so-vits-svc-4.0/resolve/main/Pinkie%20(speaking%20sep)/config.json\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!svc infer {NAME}.wav --speaker \"Pinkie {neutral}\" -c config.json -m G_166400.pth\n",
    "display(Audio(f\"{NAME}.out.wav\", autoplay=True))"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "provenance": []
  },
  "gpuClass": "standard",
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
